We have been asked by a reviewer to provide details on why we did what we did in terms of clustering. Here, I want to try out a different clustering approach to see if we get overlapping clustering and how sensitive the cluster borders are to different paramters.
I thought I could try and use the Seurat workflow and ‘pretend’ that the magic table is sc RNAseq data. I have mainly been following this tutorial https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
Loading libraries
library(Seurat)
Loading required package: SeuratObject
Loading required package: sp
‘SeuratObject’ was built under R 4.4.0 but the current version is 4.4.1; it is recomended that you
reinstall ‘SeuratObject’ as the ABI for R may have changed
Attaching package: ‘SeuratObject’
The following objects are masked from ‘package:base’:
intersect, t
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(tidyverse)
── Attaching core tidyverse packages ─────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2 ── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
Importing the interaction pair data
read_tsv("../data/umap_mark_data_uncapped.txt") -> marks_import
Rows: 251254 Columns: 147── Column specification ───────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (7): interaction_ID, P_ID, PIR_ID, P_gene, PIR_gene, cluster_ID, cluster_group
dbl (140): UMAP1, UMAP2, CHiC_DNMT_KO_EpiLC_D2, CHiC_DNMT_KO_ES_D0, CHiC_DNMT_WT_EpiLC_D2, CHiC_DNMT_WT_ES_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(marks_import)
colnames(marks_import)
[1] "interaction_ID" "UMAP1" "UMAP2"
[4] "P_ID" "PIR_ID" "P_gene"
[7] "PIR_gene" "CHiC_DNMT_KO_EpiLC_D2" "CHiC_DNMT_KO_ES_D0"
[10] "CHiC_DNMT_WT_EpiLC_D2" "CHiC_DNMT_WT_ES_D0" "CHiC_TET_KO_EpiLC_D2"
[13] "CHiC_TET_KO_ES_D0" "CHiC_TET_WT_EpiLC_D2" "CHiC_TET_WT_ES_D0"
[16] "P_mC_DNMT_WT_ES_D0" "P_mC_DNMT_WT_EpiLC_D2" "P_mC_TET_WT_ES_D0"
[19] "P_mC_TET_WT_EpiLC_D2" "P_mC_TET_KO_ES_D0" "P_mC_TET_KO_EpiLC_D2"
[22] "PIR_mC_DNMT_WT_ES_D0" "PIR_mC_DNMT_WT_EpiLC_D2" "PIR_mC_TET_WT_ES_D0"
[25] "PIR_mC_TET_WT_EpiLC_D2" "PIR_mC_TET_KO_ES_D0" "PIR_mC_TET_KO_EpiLC_D2"
[28] "P_hmC_TET_WT_ES_D0" "P_hmC_TET_WT_EpiLC_D2" "P_hmC_DNMT_WT_EpiLC_D2"
[31] "P_ATAC_DNMT_KO_EpiLC_D2" "P_ATAC_DNMT_KO_ES_D0" "P_ATAC_DNMT_WT_EpiLC_D2"
[34] "P_ATAC_DNMT_WT_ES_D0" "P_ATAC_TET_KO_EpiLC_D2" "P_ATAC_TET_KO_ES_D0"
[37] "P_ATAC_TET_WT_EpiLC_D2" "P_ATAC_TET_WT_ES_D0" "P_CTCF_DNMT_KO_EpiLC_D2"
[40] "P_CTCF_DNMT_KO_ES_D0" "P_CTCF_DNMT_WT_EpiLC_D2" "P_CTCF_DNMT_WT_ES_D0"
[43] "P_CTCF_TET_KO_EpiLC_D2" "P_CTCF_TET_KO_ES_D0" "P_CTCF_TET_WT_EpiLC_D2"
[46] "P_CTCF_TET_WT_ES_D0" "P_H3K27ac_DNMT_KO_EpiLC_D2" "P_H3K27ac_DNMT_KO_ES_D0"
[49] "P_H3K27ac_DNMT_WT_EpiLC_D2" "P_H3K27ac_DNMT_WT_ES_D0" "P_H3K27ac_TET_KO_EpiLC_D2"
[52] "P_H3K27ac_TET_KO_ES_D0" "P_H3K27ac_TET_WT_EpiLC_D2" "P_H3K27ac_TET_WT_ES_D0"
[55] "P_H3K27me3_DNMT_KO_EpiLC_D2" "P_H3K27me3_DNMT_KO_ES_D0" "P_H3K27me3_DNMT_WT_EpiLC_D2"
[58] "P_H3K27me3_DNMT_WT_ES_D0" "P_H3K27me3_TET_KO_EpiLC_D2" "P_H3K27me3_TET_KO_ES_D0"
[61] "P_H3K27me3_TET_WT_EpiLC_D2" "P_H3K27me3_TET_WT_ES_D0" "P_H3K4me1_DNMT_KO_EpiLC_D2"
[64] "P_H3K4me1_DNMT_KO_ES_D0" "P_H3K4me1_DNMT_WT_EpiLC_D2" "P_H3K4me1_DNMT_WT_ES_D0"
[67] "P_H3K4me1_TET_KO_EpiLC_D2" "P_H3K4me1_TET_KO_ES_D0" "P_H3K4me1_TET_WT_EpiLC_D2"
[70] "P_H3K4me1_TET_WT_ES_D0" "P_H3K4me3_DNMT_KO_EpiLC_D2" "P_H3K4me3_DNMT_KO_ES_D0"
[73] "P_H3K4me3_DNMT_WT_EpiLC_D2" "P_H3K4me3_DNMT_WT_ES_D0" "P_H3K4me3_TET_KO_EpiLC_D2"
[76] "P_H3K4me3_TET_KO_ES_D0" "P_H3K4me3_TET_WT_EpiLC_D2" "P_H3K4me3_TET_WT_ES_D0"
[79] "P_Input_DNMT_KO_EpiLC_D2" "P_Input_DNMT_KO_ES_D0" "P_Input_DNMT_WT_EpiLC_D2"
[82] "P_Input_DNMT_WT_ES_D0" "P_Input_TET_KO_EpiLC_D2" "P_Input_TET_KO_ES_D0"
[85] "P_Input_TET_WT_EpiLC_D2" "P_Input_TET_WT_ES_D0" "PIR_hmC_TET_WT_ES_D0"
[88] "PIR_hmC_TET_WT_EpiLC_D2" "PIR_hmC_DNMT_WT_EpiLC_D2" "PIR_ATAC_DNMT_KO_EpiLC_D2"
[91] "PIR_ATAC_DNMT_KO_ES_D0" "PIR_ATAC_DNMT_WT_EpiLC_D2" "PIR_ATAC_DNMT_WT_ES_D0"
[94] "PIR_ATAC_TET_KO_EpiLC_D2" "PIR_ATAC_TET_KO_ES_D0" "PIR_ATAC_TET_WT_EpiLC_D2"
[97] "PIR_ATAC_TET_WT_ES_D0" "PIR_CTCF_DNMT_KO_EpiLC_D2" "PIR_CTCF_DNMT_KO_ES_D0"
[100] "PIR_CTCF_DNMT_WT_EpiLC_D2" "PIR_CTCF_DNMT_WT_ES_D0" "PIR_CTCF_TET_KO_EpiLC_D2"
[103] "PIR_CTCF_TET_KO_ES_D0" "PIR_CTCF_TET_WT_EpiLC_D2" "PIR_CTCF_TET_WT_ES_D0"
[106] "PIR_H3K27ac_DNMT_KO_EpiLC_D2" "PIR_H3K27ac_DNMT_KO_ES_D0" "PIR_H3K27ac_DNMT_WT_EpiLC_D2"
[109] "PIR_H3K27ac_DNMT_WT_ES_D0" "PIR_H3K27ac_TET_KO_EpiLC_D2" "PIR_H3K27ac_TET_KO_ES_D0"
[112] "PIR_H3K27ac_TET_WT_EpiLC_D2" "PIR_H3K27ac_TET_WT_ES_D0" "PIR_H3K27me3_DNMT_KO_EpiLC_D2"
[115] "PIR_H3K27me3_DNMT_KO_ES_D0" "PIR_H3K27me3_DNMT_WT_EpiLC_D2" "PIR_H3K27me3_DNMT_WT_ES_D0"
[118] "PIR_H3K27me3_TET_KO_EpiLC_D2" "PIR_H3K27me3_TET_KO_ES_D0" "PIR_H3K27me3_TET_WT_EpiLC_D2"
[121] "PIR_H3K27me3_TET_WT_ES_D0" "PIR_H3K4me1_DNMT_KO_EpiLC_D2" "PIR_H3K4me1_DNMT_KO_ES_D0"
[124] "PIR_H3K4me1_DNMT_WT_EpiLC_D2" "PIR_H3K4me1_DNMT_WT_ES_D0" "PIR_H3K4me1_TET_KO_EpiLC_D2"
[127] "PIR_H3K4me1_TET_KO_ES_D0" "PIR_H3K4me1_TET_WT_EpiLC_D2" "PIR_H3K4me1_TET_WT_ES_D0"
[130] "PIR_H3K4me3_DNMT_KO_EpiLC_D2" "PIR_H3K4me3_DNMT_KO_ES_D0" "PIR_H3K4me3_DNMT_WT_EpiLC_D2"
[133] "PIR_H3K4me3_DNMT_WT_ES_D0" "PIR_H3K4me3_TET_KO_EpiLC_D2" "PIR_H3K4me3_TET_KO_ES_D0"
[136] "PIR_H3K4me3_TET_WT_EpiLC_D2" "PIR_H3K4me3_TET_WT_ES_D0" "PIR_Input_DNMT_KO_EpiLC_D2"
[139] "PIR_Input_DNMT_KO_ES_D0" "PIR_Input_DNMT_WT_EpiLC_D2" "PIR_Input_DNMT_WT_ES_D0"
[142] "PIR_Input_TET_KO_EpiLC_D2" "PIR_Input_TET_KO_ES_D0" "PIR_Input_TET_WT_EpiLC_D2"
[145] "PIR_Input_TET_WT_ES_D0" "cluster_ID" "cluster_group"
Retaining only the columns that should go into clustering
marks_import %>%
select(-contains("UMAP"), -P_ID, -PIR_ID, -P_gene, -PIR_gene, -contains("Input"), -cluster_ID, -cluster_group) %>% # the labels and marks I don't want
select(-contains("DNMT"), -contains("ES"), -contains("KO")) -> marks # limiting to Epi and TET and WT
head(marks)
Transposing this so it can be made into a Seurat object
marks %>%
column_to_rownames(var = "interaction_ID") %>%
t() %>%
as.data.frame() -> marks_transposed
head(marks_transposed)
Making this into a Seurat object
CreateSeuratObject(counts = marks_transposed, assay = "mark") -> marks_Seurat
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')Warning: Data is of class data.frame. Coercing to dgCMatrix.
LogNormalising the values
marks_normalised <- NormalizeData(marks_Seurat)
Normalizing layer: counts
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Scaling the data
all.marks <- rownames(marks_normalised)
marks_scaled <- ScaleData(marks_normalised, features = all.marks)
Centering and scaling data matrix
|
| | 0%
|
|==================================================================================================================================================| 100%
performing linear dimensional reduction
marks_PCA <- RunPCA(marks_scaled, features = all.marks)
Warning: You're computing too large a percentage of total singular values, use a standard svd instead.Warning: Requested number is larger than the number of available items (17). Setting to 17.Warning: Requested number is larger than the number of available items (17). Setting to 17.Warning: Requested number is larger than the number of available items (17). Setting to 17.Warning: Requested number is larger than the number of available items (17). Setting to 17.Warning: Requested number is larger than the number of available items (17). Setting to 17.PC_ 1
Positive: P-H3K27ac-TET-WT-EpiLC-D2, P-H3K4me1-TET-WT-EpiLC-D2, P-ATAC-TET-WT-EpiLC-D2, P-H3K4me3-TET-WT-EpiLC-D2, PIR-H3K27ac-TET-WT-EpiLC-D2, PIR-H3K4me1-TET-WT-EpiLC-D2, PIR-ATAC-TET-WT-EpiLC-D2, PIR-H3K4me3-TET-WT-EpiLC-D2
Negative: P-mC-TET-WT-EpiLC-D2, PIR-mC-TET-WT-EpiLC-D2, PIR-H3K27me3-TET-WT-EpiLC-D2, P-H3K27me3-TET-WT-EpiLC-D2, CHiC-TET-WT-EpiLC-D2, P-hmC-TET-WT-EpiLC-D2, PIR-hmC-TET-WT-EpiLC-D2, PIR-CTCF-TET-WT-EpiLC-D2
PC_ 2
Positive: PIR-H3K4me3-TET-WT-EpiLC-D2, P-mC-TET-WT-EpiLC-D2, PIR-ATAC-TET-WT-EpiLC-D2, PIR-H3K27ac-TET-WT-EpiLC-D2, PIR-H3K4me1-TET-WT-EpiLC-D2, PIR-CTCF-TET-WT-EpiLC-D2, P-hmC-TET-WT-EpiLC-D2, PIR-hmC-TET-WT-EpiLC-D2
Negative: PIR-mC-TET-WT-EpiLC-D2, P-H3K4me3-TET-WT-EpiLC-D2, P-ATAC-TET-WT-EpiLC-D2, P-H3K27ac-TET-WT-EpiLC-D2, P-H3K4me1-TET-WT-EpiLC-D2, P-CTCF-TET-WT-EpiLC-D2, P-H3K27me3-TET-WT-EpiLC-D2, CHiC-TET-WT-EpiLC-D2
PC_ 3
Positive: P-H3K27me3-TET-WT-EpiLC-D2, PIR-H3K27me3-TET-WT-EpiLC-D2, P-hmC-TET-WT-EpiLC-D2, P-CTCF-TET-WT-EpiLC-D2, PIR-CTCF-TET-WT-EpiLC-D2, P-H3K4me1-TET-WT-EpiLC-D2, PIR-hmC-TET-WT-EpiLC-D2, P-mC-TET-WT-EpiLC-D2
Negative: P-H3K27ac-TET-WT-EpiLC-D2, PIR-H3K27ac-TET-WT-EpiLC-D2, PIR-H3K4me3-TET-WT-EpiLC-D2, P-H3K4me3-TET-WT-EpiLC-D2, PIR-H3K4me1-TET-WT-EpiLC-D2, P-ATAC-TET-WT-EpiLC-D2, PIR-ATAC-TET-WT-EpiLC-D2, CHiC-TET-WT-EpiLC-D2
PC_ 4
Positive: PIR-H3K27me3-TET-WT-EpiLC-D2, PIR-CTCF-TET-WT-EpiLC-D2, P-H3K27me3-TET-WT-EpiLC-D2, PIR-ATAC-TET-WT-EpiLC-D2, P-CTCF-TET-WT-EpiLC-D2, P-ATAC-TET-WT-EpiLC-D2, PIR-H3K4me3-TET-WT-EpiLC-D2, CHiC-TET-WT-EpiLC-D2
Negative: PIR-hmC-TET-WT-EpiLC-D2, P-hmC-TET-WT-EpiLC-D2, PIR-mC-TET-WT-EpiLC-D2, PIR-H3K4me1-TET-WT-EpiLC-D2, P-H3K4me1-TET-WT-EpiLC-D2, P-mC-TET-WT-EpiLC-D2, PIR-H3K27ac-TET-WT-EpiLC-D2, P-H3K27ac-TET-WT-EpiLC-D2
PC_ 5
Positive: PIR-hmC-TET-WT-EpiLC-D2, PIR-mC-TET-WT-EpiLC-D2, PIR-H3K27me3-TET-WT-EpiLC-D2, PIR-CTCF-TET-WT-EpiLC-D2, PIR-H3K4me1-TET-WT-EpiLC-D2, CHiC-TET-WT-EpiLC-D2, PIR-ATAC-TET-WT-EpiLC-D2, P-H3K27me3-TET-WT-EpiLC-D2
Negative: P-hmC-TET-WT-EpiLC-D2, P-H3K4me1-TET-WT-EpiLC-D2, P-mC-TET-WT-EpiLC-D2, P-CTCF-TET-WT-EpiLC-D2, P-ATAC-TET-WT-EpiLC-D2, PIR-H3K4me3-TET-WT-EpiLC-D2, P-H3K27ac-TET-WT-EpiLC-D2, PIR-H3K27ac-TET-WT-EpiLC-D2
The warnings here are ok, it just complains that it doesn’t have enough dimensions and automatically reduces them to 17.
DimPlot(marks_PCA, reduction = "pca", raster = FALSE)
#DimHeatmap(marks_PCA, dims = 1:5, balanced = TRUE)
ElbowPlot(marks_PCA)
Warning: The object only has information for 16 reductions
It’s hard to determine a cutoff for PCAs to use, I tried 8, 9 and 12 to check the clustering.
Clustering
-> KNN graph based on euclidean distance in PCA space with edge weights between samples by Jaccard similarity (overlap in local neighbourhoods) -> Louvain clustering
marks_KNN <- FindNeighbors(marks_PCA, dims = 1:12)
Computing nearest neighbor graph
Computing SNN
marks_Louvain <- FindClusters(marks_KNN, resolution = 1)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 251254
Number of edges: 5958936
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8286
Number of communities: 31
Elapsed time: 180 seconds
I ran several iterations with different parameters to check what made the most sense with the UMAP representation. dim 9, resolution 0.8 dim 9, resolution 1.2 dim 15, resolution 1 dim 12, resolution 1 (this is the one I settled on)
Look at cluster IDs
Idents(marks_Louvain) %>%
as.data.frame() %>%
rownames_to_column(var = "interaction_ID") %>%
rename(Louvain_cluster_ID = ".") -> Louvain_clusters
head(Louvain_clusters)
Now incorporating this info in the original table
marks_import %>%
left_join(Louvain_clusters) -> marks_2
Joining with `by = join_by(interaction_ID)`
head(marks_2)
Plotting the original clusters
marks_2 %>%
slice_sample(prop = 0.1) %>%
ggplot(aes(UMAP1, UMAP2, colour = cluster_ID)) +
geom_point(size = 0.5) +
theme_bw(base_size = 14) +
ggtitle("EM clusters") +
coord_cartesian(xlim = c(-10, 12), ylim = c(-10, 8)) +
theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5))
Plotting the Louvain clusters
marks_2 %>%
slice_sample(prop = 0.05) %>%
ggplot(aes(UMAP1, UMAP2, colour = Louvain_cluster_ID)) +
geom_point(size = 0.5) +
geom_point(data = marks_2 %>% slice_sample(prop = 0.05) %>% filter(Louvain_cluster_ID == 19), size = 0.5, colour = "black") +
theme_bw(base_size = 14) +
ggtitle("Louvain clusters") +
coord_cartesian(xlim = c(-10, 12), ylim = c(-10, 8)) +
theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5))
Okay, let’s explore these a bit more.
How many regions per cluster?
marks_2 %>%
group_by(Louvain_cluster_ID) %>%
count()
Looking at marks per cluster
marks_2 %>%
select(-UMAP1, -UMAP2, -P_ID, -PIR_ID, -P_gene, -PIR_gene, -contains("DNMT"), -contains("ES"), -contains("Input"), -contains("KO"), -contains("CHiC")) %>%
select(interaction_ID, cluster_ID, cluster_group, Louvain_cluster_ID, everything()) %>%
pivot_longer(5:last_col(), names_to = "condition", values_to = "values") %>%
separate(condition, into = c("region", "mark")) -> marks_TET_Epi_long
Warning: Expected 2 pieces. Additional pieces discarded in 4020064 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
The warning here is fine, I deliberately discard the additional pieces.
Clusters to exclude (the ones with fewer than 1000 data points)
excluded_clusters <- c("21", "22", "23", "24", "25", "26", "27", "28", "29", "30")
Plotting boxplots
# marks_TET_Epi_long %>%
# filter(mark != "mC") %>%
# filter(!Louvain_cluster_ID %in% excluded_clusters) %>%
# ggplot(aes(Louvain_cluster_ID, values, fill = region)) +
# geom_boxplot(outlier.shape = NA) +
# ylim(0, 8) +
# theme_bw(base_size = 14) +
# facet_grid(rows = "mark")
Boxplots per cluster
marks_TET_Epi_long %>%
filter(mark != "mC") %>%
filter(!Louvain_cluster_ID %in% excluded_clusters) %>%
ggplot(aes(mark, values, fill = region)) +
geom_boxplot(outlier.shape = NA) +
ylim(0, 8) +
theme_bw(base_size = 14) +
facet_grid(rows = "Louvain_cluster_ID")
I then summarised the clusters according to their marks (and their presumed biological function).
Summarised clusters
cluster_group_names <- c("active P - unmarked PIR",
"active P - low primed E",
"PC P - unmarked PIR",
"inactive P - unmarked PIR",
"inactive P - low primed E, CTCF",
"primed E",
"poised E",
"active P - active P",
"active P - active E"
)
cluster_group_colours <- c("#b785bb", "#53c2b3", "#bf6c6c", "#b2b5b5", "#741113", "#eaca48", "#ca3526", "#91c740", "#252d37")
names(cluster_group_colours) <- cluster_group_names
Plotting original summarised marks
marks_2 %>%
mutate(cluster_group = gsub("inactive P - low primed E", "active P - low primed E", cluster_group)) %>%
mutate(cluster_group = gsub("active P - low primed E, CTCF", "inactive P - low primed E, CTCF", cluster_group)) %>%
slice_sample(prop = 0.1) %>%
ggplot(aes(UMAP1, UMAP2, colour = cluster_group)) +
geom_point(size = 0.5) +
theme_bw(base_size = 14) +
ggtitle("EM clusters") +
coord_cartesian(xlim = c(-10, 12), ylim = c(-10, 8)) +
scale_colour_manual(values = cluster_group_colours) +
theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5)) -> UMAP_EM_clusters
UMAP_EM_clusters
Summarising Louvain clusters (This one is for dim 12, resolution 1)
P_unmarkedPIR <- c("2", "5", "13", "15")
active_lowprimed <- c("9", "16")
PC_P_unmarked <- c("0", "12", "17")
inactive_unmarked <- c("3", "4", "7", "9", "10", "14")
inactive_lowprimedCTCF <- c("1", "19")
primed <- c("20", "6")
poised <- c( "18")
p_p <- c("11")
active_activeE <- c("8")
marks_2 %>%
filter(!Louvain_cluster_ID %in% excluded_clusters) %>%
mutate(Louvain_cluster_group = case_when(Louvain_cluster_ID %in% P_unmarkedPIR ~ "active P - unmarked PIR",
Louvain_cluster_ID %in% active_lowprimed ~ "active P - low primed E",
Louvain_cluster_ID %in% PC_P_unmarked ~ "PC P - unmarked PIR",
Louvain_cluster_ID %in% inactive_unmarked ~ "inactive P - unmarked PIR",
Louvain_cluster_ID %in% inactive_lowprimedCTCF ~ "inactive P - low primed E, CTCF",
Louvain_cluster_ID %in% primed ~ "primed E",
Louvain_cluster_ID %in% poised ~ "poised E",
Louvain_cluster_ID %in% p_p ~ "active P - active P",
Louvain_cluster_ID %in% active_activeE ~ "active P - active E")) -> marks_3
Plotting Louvain summarised marks
marks_3 %>%
slice_sample(prop = 0.05) %>%
ggplot(aes(UMAP1, UMAP2, colour = Louvain_cluster_group)) +
geom_point(size = 0.5) +
theme_bw(base_size = 14) +
ggtitle("Louvain clusters") +
coord_cartesian(xlim = c(-10, 12), ylim = c(-10, 8)) +
scale_colour_manual(values = cluster_group_colours) +
theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5), legend.title = element_blank()) -> UMAP_Louvain_clusters
UMAP_Louvain_clusters
Exporting
ggsave("../output/plots/UMAP_Louvain_clusters.png", UMAP_Louvain_clusters, height = 4, width = 8, units = "in")
ggsave("../output/plots/UMAP_Louvain_clusters.svg", UMAP_Louvain_clusters, height = 4, width = 8, units = "in")
Error in loadNamespace(x) : there is no package called ‘svglite’
Some clusters are strong and essentially pop up in all cluster conditions:
Exporting the clusters